#!/usr/bin/env python
# coding: utf-8

# In[1]:


exec(open('init_path.py').read())
exec((P_Lib/'GasStation.py').read_text())
get_ipython().run_line_magic('matplotlib', 'inline')


# In[4]:

# In[5]:

fuels = ['e5','e10','diesel']


for fuel in fuels:

    # # 7-21 Version
    files = list((P_GS_Data_Raw / ('PH_FullDay_7to21_'+fuel)).glob('*.h5'))
    # print(len(files))


    ls_strYMD_weekday_nonholiday = load_obj(P_GS_Data / 'GS' / 'ls_strYMD_weekday_nonholiday.pkl')
    files = [f for f in files if f.stem in ls_strYMD_weekday_nonholiday]
    files = sorted(files, key=lambda f: int(f.name.split('.')[0].split('_')[-1]))
    print(len(files))


    # In[11]:


    def calc_AvgP_WA(f): # v - 2020-10-29
        df = read_hdf(f, 'GS')
        # df = df[df.StID.isin([1,1222,13987])].copy()
        ymd = f.name.split('.')[0].split('_')[-1]
        df['TimeL1'] = df.groupby('StID').Time.shift(-1)
        df['Duration'] = (df.TimeL1 - df.Time).dt.seconds / 3600
        df = df[(df[fuel]>0) & df.Duration.notnull()].copy() # remove unavailable prices & last row duration, coz when doing groupby, pandas treat nan as zero
        df['PTotal'] = df[fuel] * df.Duration
        gped = df.groupby('StID')
        AvgP = gped.PTotal.sum() / gped.Duration.sum()
        AvgP.name = ymd
        return AvgP


    # In[12]:


    with Pool(8) as p:
    #     ls = p.map(calc_AvgP_WA, files[:10])
        ls = p.map(calc_AvgP_WA, files)


    # In[13]:


    df = concat(ls, axis=1, sort=False).stack().reset_index()
    df.columns = ['StID','YMD','AvgP']
    df['YMD'] = df.YMD.astype(int)
    df.head(2)


    # In[14]:


    df.to_stata(P_GS_Data / 'Price' / ('Daily_AvgP_WA_7to21_'+fuel+'.dta'))

# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:




